if(Sys.info()['user'] == 'dbroock') {
  setwd("~/Dropbox/Broockman-Kaufman-Lenz Roll Call Look Up and Heuristics Experiments/")
} else {
  setwd("c:/gl/Dropbox/Roll Call Look Up/")
}
setwd("Replication/Studies 3a(ii), 3b(ii), 4(ii)/data")

library(tidyverse)
require(xtable)
library(psych)

dat = read.csv("raw/heuristics_march2020_data_pii_removed.csv", stringsAsFactors = FALSE) %>%
  filter(heur_success == 'TRUE') %>%
  select(-(understand_media:emotions_placebo2),
         -(v:trump_treat),
         -consent, -understandac, -ac, -employment,
         -heur_longname, -heur_success,
         -heur_otherparty) %>%
  dplyr::rename(repcandidateid = heur_candidateid)

# Merge in actual MC votes
votes = read.csv("Ancillary Data/processed_MCs_for_api.csv", stringsAsFactors = FALSE)
names(votes) <- paste0('rep', gsub('vote', 'actualvote', names(votes)))
dat = merge(dat, votes, by = "repcandidateid")

vote.ids <- c("55735", "56612", "59180",
              "59438", "58622", "58745",
              "59189", "59434", "60053",
              "60590", "60787", "59737",
              "59632", "60153", "60587",
              "61788", "61746")

# spot check recoding
spot.check <- data.frame(vote.ids, names(dat)[24:40], names(dat)[42:58])
for(i in 1:nrow(spot.check)) print(spot.check[i,])

# Rename variable names to match vote IDs.
# Own views
names(dat)[24:40] = paste0('ownview', vote.ids)
# MOC vote guesses
names(dat)[42:58] = paste0('repvoteperc', vote.ids)

# Recodes
for(i in c(24:40)) dat[,i] <- as.numeric(recode(dat[,i], `AGAINST` = '0', `FOR` = '1'))
for(i in c(42:58)) dat[,i] <- as.numeric(recode(dat[,i],
                                                `${e://Field/heur_shortname} voted NO` = '0',
                                                `${e://Field/heur_shortname} voted YES` = '1'))

# Data at the level of votes/SIGs potentially shown
# read in list of votes to determine which it appeared on
vote.group.map <- read.csv('Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE)
a.votes <- as.character(filter(vote.group.map, split == 'a')$house_vote_id)
b.votes <- as.character(filter(vote.group.map, split == 'b')$house_vote_id)
  

## Create new variable that records whether respondents guessed correctly.
# the vote variables are 0/1, the repvotes are 1/2; make sure they are coded correctly
for(vote in vote.ids){
  dat[,paste0('repperccorrect', vote)] <-
    as.numeric(dat[,paste0('repvoteperc', vote)] == dat[,paste0('repactualvote', vote)])
  dat[is.na(dat[,paste0('repvoteperc', vote)]), paste0('repperccorrect', vote)] <- 0 # If you didn't guess, you don't know.
  dat[is.na(dat[,paste0('repactualvote', vote)]), paste0('repperccorrect', vote)] <- NA # If there's no vote, you can't know.
  
  # If you weren't asked, you can't have known.
  if(vote %in% a.votes) dat[dat$survey_version == 'b', paste0('repperccorrect', vote)] <- NA
  if(vote %in% b.votes) dat[dat$survey_version == 'a', paste0('repperccorrect', vote)] <- NA
}


# Knowledge battery
dat$knowledge_controlhouse = ifelse(dat$knowledge_controlhouse=='Democrats', 1, 0)
dat$knowledge_roberts = ifelse(dat$knowledge_roberts=='Chief Justice of the US Supreme Court', 1, 0)
dat$knowledge_senatorterms = ifelse(dat$knowledge_senatorterms=='Six (6)', 1, 0)
dat$knowledge_doddfrank = ifelse(dat$knowledge_doddfrank=='Finance', 1, 0)
cor(select(dat, starts_with('knowledge_'))) # spot check
dat$knowledgescale = dat$knowledge_controlhouse + dat$knowledge_roberts + dat$knowledge_senatorterms + dat$knowledge_doddfrank
dat <- select(dat, -starts_with('knowledge_'))

# Ideology
dat$ideo <- as.numeric(recode(dat$ideo,
                   `Very liberal` = '1',
                   `Liberal` = '2',
                   `Moderate` = '3',
                   `Don't know/ None of the above` = '3',
                   `Conservative` = '4',
                   `Very conservative` = '5'))

# Pres 2016
dat$pres2016 <- as.numeric(recode(dat$pres2016,
                                  `Donald Trump (Republican)` = '1',
                                  `Hillary Clinton (Democrat)` = '3',
                                  .default = '2'))

# US House Generic
dat$ushousegeneric <- as.numeric(recode(dat$ushousegeneric,
                                        `The Democratic candidate` = '1',
                                        `I'm not sure` = '2',
                                        `The Republican candidate` = '3'))

# Pre-treat favs
dat <- select(dat, -pretreat.fav...Actor.Brad.Pitt, -pretreat.fav...Singer.Taylor.Swift,
              -pretreat.fav...Basketball.player.LeBron.James, -pretreat.fav...Actor.Brad.Pitt)
for(i in tidyselect::vars_select(names(dat), starts_with('pretreat.fav..'))) {
  dat[,i] <- as.numeric(recode(dat[,i],
                               `Strongly favorable` = '7',
                                 `Favorable` = '6',
                                 `Slightly favorable` = '5',
                                 `Neither favorable nor unfavorable` = '4',
                                 `Slightly unfavorable` = '3',
                                 `Unfavorable` = '2',
                                 `Strongly unfavorable` = '1'))
}


# RWA
for(i in tidyselect::vars_select(names(dat), starts_with('rwa..'))) {
  dat[,i] <- as.numeric(recode(dat[,i],
                               `Strongly agree` = '5',
                                 `Somewhat agree` = '4',
                                 `Neither agree nor disagree` = '3',
                                 `Somewhat disagree` = '2',
                                 `Strongly disagree` = '1'))
}
dat$rwa <- rowMeans(dat[,tidyselect::vars_select(names(dat), starts_with('rwa..'))])
dat <- select(dat, -starts_with('rwa..'))

# Economy
for(i in c('econ_own_situation', 'econ_us_now')) {
  dat[,i] <- as.numeric(recode(dat[,i],
                               `Excellent` = '4',
                               `Good` = '3',
                               `Only fair` = '2',
                               `Poor` = '1',
                               `Would rather not say` = '2.5'))
}
dat$econ_us_future <- as.numeric(recode(dat$econ_us_future,
                                        `Better` = '3',
                                        `Just about the same` = '2',
                                        `Worse` = '1'))

# Pre-treatment Trump support
dat$pretreat_trump_vote <- NA
dat$pretreat_trump_vote[dat$trump_support_strength == 'Strongly'] <- 7
dat$pretreat_trump_vote[dat$trump_support_strength == 'Not so strongly'] <- 6
dat$pretreat_trump_vote[dat$horseracelean == 'Lean towards Republican Donald Trump'] <- 5
dat$pretreat_trump_vote[dat$horseracelean == 'Completely undecided'] <- 4
dat$pretreat_trump_vote[dat$horseracelean == 'Lean towards the Democratic candidate for President'] <- 3
dat$pretreat_trump_vote[dat$dem_support_strength == 'Not so strongly'] <- 2
dat$pretreat_trump_vote[dat$dem_support_strength == 'Strongly'] <- 1
dat <- select(dat, -trump_support_strength, -dem_support_strength, -horseracelean, -horserace)

# Trump fav
dat$trump.fav <- as.numeric(recode(dat$trump.fav,
                                   `Extremely favorable` = '7',
                                   `Moderately favorable` = '6',
                                   `Slightly favorable` = '5',
                                   `Neither favorable nor unfavorable` = '4',
                                   `Slightly unfavorable` = '3',
                                   `Moderately unfavorable` = '2',
                                   `Extremely unfavorable` = '1'))

# Trump approve
dat$trump.approve <- as.numeric(recode(dat$trump.approve,
                                       `Strongly approve` = '7',
                                       `Approve` = '6',
                                       `Somewhat approve` = '5',
                                       `Neither approve nor disapprove` = '4',
                                       `Somewhat disapprove` = '3',
                                       `Disapprove` = '2',
                                       `Strongly disapprove` = '1'))


# DVs
dat$mcapprove <- as.numeric(recode(dat$mcapprove,
                                       `Strongly approve` = '7',
                                       `Approve` = '6',
                                       `Somewhat approve` = '5',
                                       `Neither approve nor disapprove` = '4',
                                       `Somewhat disapprove` = '3',
                                       `Disapprove` = '2',
                                       `Strongly disapprove` = '1'))
dat$mcapprove[is.na(dat$mcapprove)] <- mean(dat$mcapprove, na.rm = TRUE)

dat$mcfavorability <- as.numeric(recode(dat$mcfavorability,
                                   `Extremely favorable` = '7',
                                   `Moderately favorable` = '6',
                                   `Slightly favorable` = '5',
                                   `Neither favorable nor unfavorable` = '4',
                                   `Slightly unfavorable` = '3',
                                   `Moderately unfavorable` = '2',
                                   `Extremely unfavorable` = '1'))
dat$mcfavorability[is.na(dat$mcfavorability)] <- mean(dat$mcfavorability, na.rm = TRUE)

dat$genericballot <- 0
dat$genericballot[dat$X2018generic == '${e://Field/heur_longname}'] <- 2
dat$genericballot[dat$X2018genericlean == '${e://Field/heur_shortname}'] <- 1
dat$genericballot[dat$X2018genericlean == 'Completely undecided'] <- 0
dat$genericballot[dat$X2018genericlean == 'The ${e://Field/heur_otherparty} that runs against them'] <- -1
dat$genericballot[dat$X2018generic == 'The ${e://Field/heur_otherparty} that runs against them'] <- -2
dat <- select(dat, -X2018generic, -X2018genericlean)

fit <- principal(dat[,c('mcapprove', 'mcfavorability', 'genericballot')],
                 nfactors=1, rotate="varimax")
dat$mcratingscale <- fit$scores[,1]


# Clean Lucid variables
dat$education[dat$education == -3105] <- 4
dat$hhi[dat$hhi == -3105] <- 10

dat$ethnicity <- as.numeric(dat$ethnicity)
dat$ethnicity[dat$ethnicity >= 4 & dat$ethnicity <= 14] <- 4
dat$ethnicity[dat$ethnicity == 15 | dat$ethnicity == 16] <- 5

dat$hispanic <- as.numeric(dat$hispanic)
dat$ethnicity[dat$hispanic >= 2 & dat$hispanic <= 14] <- 6

dat$political_party <- as.numeric(
  recode(as.character(dat$political_party),
                              `6` = '3', `7` = '4', `8` = '5', `9` = '6', `10` = '7')
  )

dat$ethnicity_str <- recode(as.character(dat$ethnicity),
                            `1` = 'White',
                            `2` = 'Black',
                            `3` = 'AmericanIndian',
                            `4` = 'Asian',
                            `5` = 'Other',
                            `6` = 'Hispanic')


# Controls for precision
dat$MC_republican <- ifelse(dat$reppartyoneletter == 'R', 1, 0)
for(e in unique(dat$ethnicity_str)) dat[,paste0('ethnicity_ind_', e)] <- dat$ethnicity_str == e
partisan.controls <- as.character(c('ideo', 'pres2016', 'ushousegeneric',
                       tidyselect::vars_select(names(dat), starts_with('pretreat.fav...')),
                       tidyselect::vars_select(names(dat), starts_with('ethnicity_ind_')),
                       'econ_us_future', 'econ_us_now', 'econ_own_situation',
                       'trump.fav', 'trump.approve', 'gender', 'political_party', 'rwa')) %>%
  setdiff('ethnicity_ind_White') # omitted category

for(v in partisan.controls) {
  dat[,v] <- as.numeric(dat[,v])
  dat[,paste0('control_', v)] <- dat[,v] - mean(dat[,v], na.rm = TRUE)
  dat[,paste0('control_MCpartyX', v)] <- ifelse(dat$MC_republican, 1, -1) * dat[,paste0('control_', v)]
}
dat$control_MC_republican <- dat$MC_republican

# Write data for heuristics project
write.csv(dat, 'cleaned/heuristics_cleaned_wide.csv', row.names = F)


# Next, write "long" data at vote by person level.
# This is used in Study 3.

# If rep vote is missing, need to set SIG rating to NA since it would not be used
# If SIG rating is missing, need to set rep vote to NA since it would not be used
votes <- read.csv('Ancillary Data/vote_and_group_info_map.csv', stringsAsFactors = FALSE)
for(i in 1:nrow(votes)) {
  sig_id <- votes$sig_id[i]
  house_vote_id <- votes$house_vote_id[i]
  dat[is.na(dat[,paste0('repactualvote', house_vote_id)]),
      paste0('reprating', sig_id)] <- NA
  dat[is.na(dat[,paste0('reprating', sig_id)]),
      paste0('repactualvote', house_vote_id)] <- NA
}

# A helper function to extract issue area from a variable name
substrRight <- function(x, n) {
  substr(x, nchar(x) - n + 1, nchar(x))
}

# This first melt call produces a respondent * issue area data frame with the 
# respondent's guess regarding their Congressmember's vote.
dat$id <- 1:nrow(dat)
guesses <- select(dat, starts_with('repvoteperc'), id) %>%
  reshape2::melt(id = "id")
guesses$variable = as.character(guesses$variable)
guesses$issue = sapply(guesses$variable, FUN=function(x) substrRight(x, 5))
guesses <- guesses %>%
  rename(guess = value) %>%
  select(-variable)

# This second melt call produces a respondent * issue area data frame 
# with their Congressmember's actual vote.
rep.actual.votes <- select(dat, starts_with('repactualvote'), id) %>%
  reshape2::melt(id = "id")
rep.actual.votes$variable = as.character(rep.actual.votes$variable)
rep.actual.votes$issue = sapply(rep.actual.votes$variable, FUN=function(x) substrRight(x, 5))
rep.actual.votes <- rep.actual.votes %>%
  rename(truth = value) %>%
  select(-variable)

# Merge the two long data frames together along with an identifier for 
# which issue area the respondent received a SIG rating for
dat_long = merge(guesses, rep.actual.votes) %>%
  merge(select(dat, id, heur_randomrating_house_vote_id,
               heur_num_eligible_ratings, heur_survey_version, knowledgescale))

# Calculate the dependent variable: whether the respondent guessed the MoC's vote correctly on each issue.
dat_long$correct = dat_long$guess == dat_long$truth

# NAs come from either MCs not voting or the issue being in the wrong group. We expect just over 50% of NAs.
dat_long <- filter(dat_long, !is.na(dat_long$correct))

# Create variables indicating which treatment group each observation is in
dat_long$heur_randomrating_house_vote_id <- as.character(dat_long$heur_randomrating_house_vote_id)
dat_long$direct = dat_long$issue == dat_long$heur_randomrating_house_vote_id

# Merge in number of candidates variable
dat_long <- left_join(dat_long, select(dat, heur_num_candidates, id))

write.csv(dat_long, 'cleaned/heuristics_cleaned_long.csv', row.names = F)
